# Use R to produce information similar to that 
# shown on the web page generated at
#  http://courses.wccnet.edu/~palay/math160r/setuplookatsamples.htm

#  First, look at a population that is
#  normally distributed.

#  We will use the gnrnd5 function to generate
#  the initial population of 8000 items so that 
#  we have control over it and can easily generate 
#  the same population at different times and places.

source( "../gnrnd5.R")
#  Note, change the 34343 in the first argument
#  to generate different values
gnrnd5(434343799904, 1000000000 )
source("../pop_sd.R")
sigma <- pop_sd( L1 )
mu <- mean( L1 )

#  now decide on the desired mean and standard 
#  deviation
desired_mean <- 142      # change this if you want
desired_sd   <-  13.6    # change this if you want

new_pop <- (L1 - mu)/sigma*desired_sd + desired_mean

#  let us look at the new_pop
head( new_pop, 12 )
tail( new_pop, 12)
summary( new_pop )
mean( new_pop )
pop_sd( new_pop )
hist( new_pop )
# a slightly better plot
hist( new_pop, main="Our underlying population",
      breaks=30)
boxplot( new_pop, horizontal=TRUE,
         main="Our underlying population")
source("../assess_normality.R")
assess_normality( new_pop )


#  now, we want to take 1000 samples, each of 
#  some specified size, and then look at the 
#  population of the sample means, and even of
#  the sample standard deviations.

desired_sample_size <- 32  # change this if you want 

#  Note that each time you perform the following 
#  lines you will get a different sequence of 
#  1000 samples

my_sample_means <- 1:1000
my_sample_sds   <- 1:1000

for (i in 1:1000) {
  this_sample <- sample( new_pop, desired_sample_size)
  my_sample_means[i] <- mean( this_sample )
  my_sample_sds[i]   <- sd( this_sample )
}

# look at the sample means
head( my_sample_means, 12)
tail( my_sample_means, 12)

mean( my_sample_means )
pop_sd( my_sample_means )
#  recall the predicted values
mean( new_pop )
pop_sd( new_pop)/ sqrt( desired_sample_size )

#  then, look at the mean of the sample 
#  standard deviations
mean( my_sample_sds )

#  check out a few graphs
hist( my_sample_means, breaks=30,
      main="Histogram of sample means")
boxplot( my_sample_means,
         horizontal=TRUE,
         main="Boxplot of sample means")
assess_normality( my_sample_means)


######################################
##   everything is as we expect.  Feel free to
##   go back and get new samples, start at
##   line 55.
##   Or, feel free to change the sample size
##   by changing line 49 and then running
##   the lines after that.
##   Or feel free to change the mean or 
##   standard deviation of the population by
##   changing lines 23 and/or 24 and then 
##   running the lines after that.
##   Or feel free to change the original 
##   distribution by changing the 5 digit 
##   seed value pointed out in lines 14-16
######################################

###################################
##  Now let us do the same thing, but this time
##  we will start with what is essentially a 
##  uniform distribution.
#  Note, change the 34343 in the first argument
#  to generate different values
gnrnd5(434343799901, 9678394327 )

sigma <- pop_sd( L1 )
mu <- mean( L1 )

#  now decide on the desired mean and standard 
#  deviation
desired_mean <- 142      # change this if you want
desired_sd   <-  13.6    # change this if you want

new_pop <- (L1 - mu)/sigma*desired_sd + desired_mean

#  let us look at the new_pop
head( new_pop, 12 )
tail( new_pop, 12)
summary( new_pop )
mean( new_pop )
pop_sd( new_pop )
hist( new_pop )
# a slightly better plot
hist( new_pop, main="Our underlying population",
      breaks=30)
boxplot( new_pop, horizontal=TRUE,
         main="Our underlying population")

assess_normality( new_pop )


#  now, we want to take 1000 samples, each of 
#  some specified size, and then look at the 
#  population of the sample means, and even of
#  the sample standard deviations.

desired_sample_size <- 32  # change this if you want 

#  Note that each time you perform the following 
#  lines you will get a different sequence of 
#  1000 samples

my_sample_means <- 1:1000
my_sample_sds   <- 1:1000

for (i in 1:1000) {
  this_sample <- sample( new_pop, desired_sample_size)
  my_sample_means[i] <- mean( this_sample )
  my_sample_sds[i]   <- sd( this_sample )
}

# look at the sample means
head( my_sample_means, 12)
tail( my_sample_means, 12)

mean( my_sample_means )
pop_sd( my_sample_means )
#  recall the predicted values
mean( new_pop )
pop_sd( new_pop)/ sqrt( desired_sample_size )

#  then, look at the mean of the sample 
#  standard deviations
mean( my_sample_sds )

#  check out a few graphs
hist( my_sample_means, breaks=30,
      main="Histogram of sample means")
boxplot( my_sample_means,
         horizontal=TRUE,
         main="Boxplot of sample means")
assess_normality( my_sample_means)


######################################
##   everything is as we expect.  Feel free to
##   go back and get new samples, start at
##   line 148.
##   Or, feel free to change the sample size
##   by changing line 142 and then running
##   the lines after that.
##   Or feel free to change the mean or 
##   standard deviation of the population by
##   changing lines 116 and/or 117 and then 
##   running the lines after that.
##   Or feel free to change the original 
##   distribution by changing the 5 digit 
##   seed value pointed out in lines 107 to 109
######################################

###################################
##  Now let us do the same thing, but this time
##  we will start with what is essentially a 
##  skewed right distribution.
#  Note, change the 34343 in the first argument
#  to generate different values
gnrnd5(434343799902, 9678394327 )

sigma <- pop_sd( L1 )
mu <- mean( L1 )

#  now decide on the desired mean and standard 
#  deviation
desired_mean <- 142      # change this if you want
desired_sd   <-  13.6    # change this if you want

new_pop <- (L1 - mu)/sigma*desired_sd + desired_mean

#  let us look at the new_pop
head( new_pop, 12 )
tail( new_pop, 12)
summary( new_pop )
mean( new_pop )
pop_sd( new_pop )
hist( new_pop )
# a slightly better plot
hist( new_pop, main="Our underlying population",
      breaks=30)
boxplot( new_pop, horizontal=TRUE,
         main="Our underlying population")

assess_normality( new_pop )


#  now, we want to take 1000 samples, each of 
#  some specified size, and then look at the 
#  population of the sample means, and even of
#  the sample standard deviations.

desired_sample_size <- 32  # change this if you want 

#  Note that each time you perform the following 
#  lines you will get a different sequence of 
#  1000 samples

my_sample_means <- 1:1000
my_sample_sds   <- 1:1000

for (i in 1:1000) {
  this_sample <- sample( new_pop, desired_sample_size)
  my_sample_means[i] <- mean( this_sample )
  my_sample_sds[i]   <- sd( this_sample )
}

# look at the sample means
head( my_sample_means, 12)
tail( my_sample_means, 12)

mean( my_sample_means )
pop_sd( my_sample_means )
#  recall the predicted values
mean( new_pop )
pop_sd( new_pop)/ sqrt( desired_sample_size )

#  then, look at the mean of the sample 
#  standard deviations
mean( my_sample_sds )

#  check out a few graphs
hist( my_sample_means, breaks=30,
      main="Histogram of sample means")
boxplot( my_sample_means,
         horizontal=TRUE,
         main="Boxplot of sample means")
assess_normality( my_sample_means)


######################################
##   everything is as we expect.  Feel free to
##   go back and get new samples, start at
##   line 241.
##   Or, feel free to change the sample size
##   by changing line 235 and then running
##   the lines after that.
##   Or feel free to change the mean or 
##   standard deviation of the population by
##   changing lines 209 and/or 210 and then 
##   running the lines after that.
##   Or feel free to change the original 
##   distribution by changing the 5 digit 
##   seed value pointed out in lines 200 to 202
######################################